####################################################################################
### Political leadership has limited impact on fossil fuel taxes and subsidies   ###
### Code to replicate permutation-based analysis                                 ###
### This version: October 29, 2022                                               ###
####################################################################################

#---------------------------------------------------#
# Preliminaries                                     #
#---------------------------------------------------#

rm(list = ls())

#List of packages for session
.packages = c("purrrallel","parallel", "tidyverse", "estimatr", "estimatr", "splitstackshape",
              "lubridate")

#Install CRAN packages (if not already installed)
.inst = .packages %in% installed.packages()
if(length(.packages[!.inst]) > 0) install.packages(.packages[!.inst])

#Loading packages into session 
lapply(.packages, require, character.only=TRUE)

# Set Working Directory to wherever files are downloaded
# setwd("")

#---------------------------------------------------#
# Loading Data                                      #
#---------------------------------------------------#

#Main datasets for regression analysis
load("ffs-leaders-2022-10-29.RData")

#######################################
############ All Countries ############                     
#######################################

#Step 1: determining what countries have only 1 leader
oneleadercountry <- censdat %>% group_by(country2) %>% summarise(n_lead = n_distinct(obsid)) %>% filter(n_lead > 1)

#We are removing the following countries with only one leader: AGO, BLR, CMR, 
#COG, GMB, GNQ, KAZ, LBR, MMR, OMN, RWA, SDN, SWZ, SYR, TCD, TJK, UGA, UZB, ZWE

#Step 2: subsetting to relevant variables and preparing for function
data_try <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  2.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 2.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

#Setting the seed
set.seed(45123)

#Function

null_bycountry <- function(){
  data_try %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(data_try, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}

cl <- makeCluster(8)
clusterEvalQ(cl, {library(dplyr); library(tidyr); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry")
registerCluster(cl, nblock = 8)


system.time({
  null_distribution_bycountry <- rerun(1000, null_bycountry()) 
})

dat <- purrr::reduce(null_distribution_bycountry, bind_rows)

stopCluster(cl)
registerCluster()

#####################################
############ Democracies ############                     
#####################################

democs <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(democracy == 1) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_democracies <- function(){
  democs %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(democs, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(8)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_democracies")
registerCluster(cl, nblock = 8)


system.time({
  null_distribution_bycountry_democracies <- rerun(1000, null_bycountry_democracies()) 
})

dat_democs <- purrr::reduce(null_distribution_bycountry_democracies, bind_rows)

stopCluster(cl)
registerCluster()

#####################################
############ Autocracies ############                     
#####################################

autocs <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(democracy == 0) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_autocracies <- function(){
  autocs %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(autocs, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(8)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_autocracies")
registerCluster(cl, nblock = 8)


system.time({
  null_distribution_bycountry_autocracies <- rerun(1000, null_bycountry_autocracies()) 
})

dat_autocs <- purrr::reduce(null_distribution_bycountry_autocracies, bind_rows)

stopCluster(cl)
registerCluster()

##################################################
############ Presidential Democracies ############                     
##################################################

presid <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(democracy == 1, parliamentary == 0) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_presidential <- function(){
  presid %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(presid, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(4)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_presidential")
registerCluster(cl, nblock = 4)


system.time({
  null_distribution_bycountry_presidential <- rerun(1000, null_bycountry_presidential()) 
})

dat_presid <- purrr::reduce(null_distribution_bycountry_presidential, bind_rows)

stopCluster(cl)
registerCluster()

##################################################
############ Parliamentary Democracies ###########                     
##################################################

parl <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(democracy == 1, parliamentary == 2) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_parliamentary <- function(){
  parl %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(parl, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(4)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_parliamentary")
registerCluster(cl, nblock = 4)


system.time({
  null_distribution_bycountry_parliamentary <- rerun(1000, null_bycountry_parliamentary()) 
})

dat_parl <- purrr::reduce(null_distribution_bycountry_parliamentary, bind_rows)

stopCluster(cl)
registerCluster()

#######################################
############ Oil Exporters ############                     
#######################################

exporters <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(oil == 1) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_oilexporters <- function(){
  exporters %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(exporters, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(4)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_oilexporters")
registerCluster(cl, nblock = 4)


system.time({
  null_distribution_bycountry_oilexporters <- rerun(1000, null_bycountry_oilexporters()) 
})

dat_exports <- purrr::reduce(null_distribution_bycountry_oilexporters, bind_rows)

#######################################
############ Oil Importers ############                     
#######################################

importers <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(oil == 0) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_oilimporters <- function(){
  importers %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(importers, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(7)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_oilimporters")
registerCluster(cl, nblock = 7)


system.time({
  null_distribution_bycountry_oilimporters <- rerun(1000, null_bycountry_oilimporters()) 
})

dat_imports <- purrr::reduce(null_distribution_bycountry_oilimporters, bind_rows)

stopCluster(cl)
registerCluster()

################################################
############ Persistent Subsidizers ############                     
################################################

fixednonpersistent <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(fixedprice == 1 & persistent == 0) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_fixednonpersistent <- function(){
  fixednonpersistent %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(fixednonpersistent, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(8)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_fixednonpersistent")
registerCluster(cl, nblock = 8)


system.time({
  null_distribution_bycountry_fixednonpersistent <- rerun(1000, null_bycountry_fixednonpersistent()) 
})

dat_fnps <- purrr::reduce(null_distribution_bycountry_fixednonpersistent, bind_rows)

stopCluster(cl)
registerCluster()

###########################################
############ Persistent Taxers ############                     
###########################################

persistentsubs <- censdat %>%
  dplyr::select(country2,bmgap2015adj,time_trend,leader_id, obsid, leader, year_month,
                democracy, parliamentary, fixedprice, persistent, oil) %>%
  filter(persistent == 1) %>%
  filter(country2 %in% oneleadercountry$country2) %>%
  #Step  5.1: convert variables to factors to remove levels and to character in months
  mutate(obsid = factor(obsid),
         leader = factor(leader),
         year_month = as.character(year_month)) %>%
  group_by(country2) %>%
  #Step 5.2: create a variable to merge the simulated data with the actual data
  mutate(country_month = row_number())

null_bycountry_persistent <- function(){
  persistentsubs %>%
    group_by(country2) %>%
    #Create a column that stores the first month in which the country appears in the data
    #This column is called "year_month"
    mutate(year_month = ifelse(row_number()==1, year_month, NA),
           year_month = as.Date(paste(year_month, "01", sep = "-")),
           year_month = zoo::na.locf(year_month)) %>%
    #Summarise by calculating the length of each leader-tenure
    group_by(country2, obsid, year_month) %>%
    summarise(tenurelengths = as.character(n())) %>%
    ungroup() %>%
    #Then, sample within country to scramble the length of tenures (randomly redistribute tenure lengths)
    group_by(country2) %>%
    #Once a tenure length was sampled, expand the dataset
    mutate(scrambled_tenure_lengths = base::sample(x = as.character(tenurelengths), size = length(tenurelengths), replace = F),
           scrambled_tenure_lengths = as.numeric(scrambled_tenure_lengths)) %>%
    #Then, we expand the rows to correspond to the number of months per tenure
    #I just confirmed that this expansion of rows gives the exact same number of observations per country
    expandRows(count = "scrambled_tenure_lengths", drop = FALSE) %>%
    #Create the column of months starting with the first month a country is in the fake dataset 
    #The key here is to merge by the ranking of the month itself
    mutate(country_month = row_number()) %>%
    dplyr::select(country2, country_month, obsid, tenurelengths, scrambled_tenure_lengths) %>%
    right_join(persistentsubs, by = c("country2", "country_month")) %>%
    rename(fake_leader = obsid.x) %>%
    #Final step: create the fake time trend
    group_by(country2, fake_leader,scrambled_tenure_lengths) %>%
    mutate(fake_time_trend = row_number()) %>%
    #Run the model
    lm(bmgap2015adj ~ factor(country2) + fake_leader*fake_time_trend, data = .) %>%
    glance()
}


cl <- makeCluster(8)
clusterEvalQ(cl, {library(dplyr); library(tidyverse); library(estimatr); library(splitstackshape)})
clusterExport(cl, "null_bycountry_persistent")
registerCluster(cl, nblock = 8)


system.time({
  null_distribution_bycountry_persistent <- rerun(1000, null_bycountry_persistent()) 
})

dat_ps <- purrr::reduce(null_distribution_bycountry_persistent, bind_rows)

stopCluster(cl)
registerCluster()

#Note: the final results of the simulation will be slightly different depending on the seed selected.
